#########################
# 4. Ideology and vote choice analysis
#########################


# load packages
library(oddsratio)
library(ggstance)
library(ggeffects)
library(broom)
library(jtools)
library(dplyr)
library(haven)
library(ggplot2)
library(stringr)
library(sjPlot)

# load data
load('3_output.RData')
load('cand_cfscores.RData')


###

# 1. Merge candidate CFscores into state / districts

# a. for governors

dem_gub$dem_gub_cfscore <- dem_gub$recipient.cfscore 
gop_gub$gop_gub_cfscore <- gop_gub$recipient.cfscore 
dem_gub$dem_gub_spend <- dem_gub$total.receipts
gop_gub$gop_gub_spend <- gop_gub$total.receipts

cces12 <- merge(cces12, dem_gub[,c('state','dem_gub_cfscore','dem_gub_spend')], by.x = 'StateAbbr', by.y = 'state', all.x = T, all.y = F)
cces12 <- merge(cces12, gop_gub[,c('state','gop_gub_cfscore','gop_gub_spend')], by.x = 'StateAbbr', by.y = 'state', all.x = T, all.y = F)

# b. for House

# remove districts with more than one D/R in general (none in states under study)
dem_house <- dem_house[!duplicated(dem_house$district),]
gop_house <- gop_house[!duplicated(gop_house$district),]

# record CFscore and disbursements
dem_house$dem_house_cfscore <- dem_house$recipient.cfscore 
gop_house$gop_house_cfscore <- gop_house$recipient.cfscore
dem_house$dem_house_spend <- dem_house$total.receipts
gop_house$gop_house_spend <- gop_house$total.receipts

# collate dist variable
cces12$district <- paste(cces12$StateAbbr, str_pad(cces12$cdid, 2, pad = "0"), sep = '')

# merge
cces12 <- merge(cces12, dem_house[,c('district','dem_house_cfscore', 'dem_house_spend')], by = 'district', all.x = T, all.y = F)
cces12 <- merge(cces12, gop_house[,c('district','gop_house_cfscore', 'gop_house_spend')], by = 'district', all.x = T, all.y = F)


###

# 2. Prepare dataset for analysis

# vote choice
cces12$gub_vote_rep <- ifelse(cces12$CC411 == 2, 1,
                              ifelse(cces12$CC411 == 1, 0, NA))
cces12$house_vote_rep <- ifelse(cces12$HouseCand1Party != 'Democratic' & cces12$HouseCand2Party != 'Republican', NA,
                                ifelse(cces12$CC412 == 2, 1,
                                       ifelse(cces12$CC412 == 1, 0, NA)))

# GOP ideological advantage
cces12$gop_gub_cfscore[cces12$StateAbbr == 'WA'] <- 0.977 # Rob McKenna was listed as AG in candidate file
cces12$voter_CFscore <- ifelse(cces12$donor == 1, cces12$true_CFscore, cces12$imputed_CFscore)
cces12$gub_rep_ideo_adv <- abs(cces12$voter_CFscore - cces12$dem_gub_cfscore) - abs(cces12$voter_CFscore - cces12$gop_gub_cfscore)
cces12$house_rep_ideo_adv <- abs(cces12$voter_CFscore - cces12$dem_house_cfscore) - abs(cces12$voter_CFscore - cces12$gop_house_cfscore)

# control variables
cces12$party_id <- factor(cces12$party_id, levels = c('Independent','Democrat','Republican'))
cces12$race 
cces12$gender
cces12$bachelors <- ifelse(cces12$educ >= 5, 1, 0)
cces12$birthyr
cces12$party_id

# spending variable
cces12$gub_rep_spend_adv <- (cces12$gop_gub_spend - cces12$dem_gub_spend) / 1000000
cces12$gub_rep_spend_adv[cces12$StateAbbr == 'WA'] <- 1.8 # Rob McKenna outraised Inslee by 1.8M, per Seattle Times https://www.seattletimes.com/seattle-news/tab-for-governors-race-46-million/
cces12$house_rep_spend_adv <- (cces12$gop_house_spend - cces12$dem_house_spend) / 1000000

# weights
cces12$survey_wt <- cces12$V103

# Reduce dataset to 12 states with gub elections, relevant variables
gub_states <- c('WA','MT','UT','ND','WI','IN','MO','WV','NC','DE','NH','VT')
analysis <- cces12[cces12$StateAbbr %in% gub_states,]


###

# 3. Analysis

# a. Run models

gub.fit <- glm(gub_vote_rep ~ gub_rep_ideo_adv + party_id + gub_rep_spend_adv + (race == 1) + gender + bachelors 
               + birthyr + StateAbbr, data = analysis, weights = survey_wt,
               family = 'binomial')

house.fit <- glm(house_vote_rep ~ house_rep_ideo_adv + party_id + house_rep_spend_adv + (race == 1) + gender + bachelors 
               + birthyr + district, data = analysis[complete.cases(analysis$gub_vote_rep),], weights = survey_wt,
               family = 'binomial')

# b. Review results, get obs counts
summary(gub.fit)
NROW(gub.fit$model)
summary(house.fit)
NROW(house.fit$model)

z_diff = (.9551 - .8924) / sqrt((.0466^2 + .0487^2)) # are ideology coefficients different?


# c. Convert coefs to odds ratios, save in results df

gub.fit_results <- tidy(gub.fit)           
gub.fit_results <- gub.fit_results %>% mutate(
  or = exp(estimate),
  var.diag = diag(vcov(gub.fit)),
  or.se = sqrt(or^2 * var.diag),
  lower = round(-1.96*or.se + or, 2),
  upper = round(1.96*or.se + or, 2),
  ci = paste(lower, upper, sep = ', ')
)

house.fit_results <- tidy(house.fit)           
house.fit_results <- house.fit_results %>% mutate(
  or = exp(estimate),
  var.diag = diag(vcov(house.fit)),
  or.se = sqrt(or^2 * var.diag),
  lower = round(-1.96*or.se + or, 2),
  upper = round(1.96*or.se + or, 2),
  ci = paste(lower, upper, sep = ', ')
)

results <- cbind(gub.fit_results[1:11, c('term','or','ci')], house.fit_results[1:11, c('term','or','ci')])
write.csv(results, 'votechoice_results.csv')


# Clean up
rm(list=setdiff(ls(),''))
